package com.daervin.svc.parser;

import com.daervin.svc.common.constants.Category;
import com.daervin.svc.common.constants.Constants;
import com.daervin.svc.common.dto.NewsDTO;
import com.daervin.svc.common.utils.DateTimeUtils;
import com.daervin.svc.common.utils.NumberUtil;
import org.apache.log4j.Logger;
import org.springframework.util.StringUtils;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.List;

import static com.daervin.svc.common.constants.SourceEnum.CHINA_VENTURE;

/**
 * @author daervin
 * @version 1.0.0
 * @since 1.0.0
 */
public class ChinaVentureParser extends RootParser {
    private final static Logger LOGGER = Logger.getLogger(ChinaVentureParser.class);

    public ChinaVentureParser(String url) {
        super(url);
    }

    @Override
    public void listProcess(Page page) {
        Html html = page.getHtml();
        List<Selectable> newsViewList = html.xpath("//ul[@class=\"common_fastinfo_list\"]/").nodes();
        List<NewsDTO> newsList = new ArrayList<>();
        for (Selectable newsView : newsViewList) {
            Calendar bdateTmp = Calendar.getInstance();
            try {
                String title = newsView.xpath("//div[@class=\"info\"]/h1/a/text()").get();
                if (title != null) {
                    title = title.replaceAll("【首发】", "");
                }
                String links = newsView.xpath("//div[@class=\"info\"]/h1/a").links().get();
                if (links != null && !links.contains("www.chinaventure.com.cn")) {
                    links = "https://www.chinaventure.com.cn" + links;
                }
                String desc = newsView.xpath("//div[@class=\"info\"]/p/text()").get();
                String publishTime = newsView.xpath("//div[@class='releasetime']/text()").get();
                if (StringUtils.isEmpty(title) || StringUtils.isEmpty(desc) || StringUtils.isEmpty(publishTime)) {
                    continue;
                }
                if (publishTime.contains("分钟前")) {
                    publishTime = publishTime.replaceAll("分钟前", "");
                    Integer minute = NumberUtil.safeParseNumber(publishTime, Integer.class);
                    if (minute == null) {
                        continue;
                    }
                    bdateTmp.add(Calendar.MINUTE, -minute);
                }
                if (publishTime.contains("小时前")) {
                    publishTime = publishTime.replaceAll("小时前", "");
                    Integer hour = NumberUtil.safeParseNumber(publishTime, Integer.class);
                    if (hour == null) {
                        continue;
                    }
                    bdateTmp.add(Calendar.HOUR_OF_DAY, -hour);
                }
                NewsDTO news = new NewsDTO();
                news.setTitle(title);
                news.setDesc(desc);
                news.setBelongDate(DateTimeUtils.longParse(bdateTmp.getTime()));
                news.setCategory(CHINA_VENTURE.category);
                if (title.contains("融资") || title.contains("领投") || title.contains("收购")) {
                    news.setCategory(Category.FINANCE.ordinal());
                }
                news.setAnnouncer(CHINA_VENTURE.announcer);
                news.setLinks(links);
                newsList.add(news);
            } catch (Exception e) {
                System.err.println("ChinaVentureParser error: " + e.getMessage());
                LOGGER.error("ChinaVentureParser error", e);
            }
        }

        page.putField(Constants.PARSER_RESULT, newsList);
    }
}
